##Download into R Studio the PlayerBBall.csv dataset. This data set is every NBA basketball player from 1950 to present. It contains their height, weight, position and the year they played (among other data.) (Position: F-Forward, C-Centers, F-C and C-F – Forward /Centers, G – Guards, F-G – Forward/Guards) FYI: If you feel that these questions are open ended or at least a little vague, this is on purpose. Answer the question as you understand it and make any assumptions you need to make to answer the question and record those assumptions. (3-5 hours)

hello <- read.csv("PlayersBBall.csv")
head(hello)
##                  name year_start year_end position height weight
## 1      Alaa Abdelnaby       1991     1995      F-C   6-10    240
## 2     Zaid Abdul-Aziz       1969     1978      C-F    6-9    235
## 3 Kareem Abdul-Jabbar       1970     1989        C    7-2    225
## 4  Mahmoud Abdul-Rauf       1991     2001        G    6-1    162
## 5   Tariq Abdul-Wahad       1998     2003        F    6-6    223
## 6 Shareef Abdur-Rahim       1997     2008        F    6-9    225
##          birth_date                               college
## 1     June 24, 1968                       Duke University
## 2     April 7, 1946                 Iowa State University
## 3    April 16, 1947 University of California, Los Angeles
## 4     March 9, 1969            Louisiana State University
## 5  November 3, 1974             San Jose State University
## 6 December 11, 1976              University of California

Use the PlayerBBall.csv dataset to visually represent (summarize) the number of players in each position.

summary(hello)
##      name             year_start      year_end      position        
##  Length:4550        Min.   :1947   Min.   :1947   Length:4550       
##  Class :character   1st Qu.:1969   1st Qu.:1973   Class :character  
##  Mode  :character   Median :1986   Median :1992   Mode  :character  
##                     Mean   :1985   Mean   :1989                     
##                     3rd Qu.:2003   3rd Qu.:2009                     
##                     Max.   :2018   Max.   :2018                     
##                                                                     
##     height              weight       birth_date          college         
##  Length:4550        Min.   :114.0   Length:4550        Length:4550       
##  Class :character   1st Qu.:190.0   Class :character   Class :character  
##  Mode  :character   Median :210.0   Mode  :character   Mode  :character  
##                     Mean   :208.9                                        
##                     3rd Qu.:225.0                                        
##                     Max.   :360.0                                        
##                     NA's   :6

#Use the dataset to visually investigate the distribution of the weight of centers (C) is greater than the distribution of the weight of forwards (F).

hist(hello$weight, main = “weight of all players”, col=“blue”)

#CENTERS
centers <- subset(hello, hello$position == "C")
head(centers)
##                   name year_start year_end position height weight
## 3  Kareem Abdul-Jabbar       1970     1989        C    7-2    225
## 22        Steven Adams       2014     2018        C    7-0    255
## 33       Alexis Ajinca       2009     2017        C    7-2    248
## 36       Solomon Alabi       2011     2012        C    7-1    251
## 38         Gary Alcorn       1960     1961        C    6-9    225
## 40        Cole Aldrich       2011     2018        C   6-11    250
##          birth_date                               college
## 3    April 16, 1947 University of California, Los Angeles
## 22    July 20, 1993              University of Pittsburgh
## 33      May 6, 1988                                      
## 36   March 21, 1988              Florida State University
## 38  October 8, 1936   California State University, Fresno
## 40 October 31, 1988                  University of Kansas
summary(centers)
##      name             year_start      year_end      position        
##  Length:502         Min.   :1947   Min.   :1947   Length:502        
##  Class :character   1st Qu.:1972   1st Qu.:1976   Class :character  
##  Mode  :character   Median :1993   Median :1998   Mode  :character  
##                     Mean   :1988   Mean   :1993                     
##                     3rd Qu.:2004   3rd Qu.:2009                     
##                     Max.   :2018   Max.   :2018                     
##                                                                     
##     height              weight       birth_date          college         
##  Length:502         Min.   :190.0   Length:502         Length:502        
##  Class :character   1st Qu.:230.0   Class :character   Class :character  
##  Mode  :character   Median :245.0   Mode  :character   Mode  :character  
##                     Mean   :244.6                                        
##                     3rd Qu.:256.0                                        
##                     Max.   :360.0                                        
##                     NA's   :2
hist(centers$weight, main = "weight of centers", col="blue")

#forwards
forwards <- subset(hello, hello$position == "F")
head(forwards)
##                   name year_start year_end position height weight
## 5    Tariq Abdul-Wahad       1998     2003        F    6-6    223
## 6  Shareef Abdur-Rahim       1997     2008        F    6-9    225
## 7        Tom Abernethy       1977     1981        F    6-7    220
## 9       John Abramovic       1947     1948        F    6-3    195
## 14           Bud Acton       1968     1968        F    6-6    210
## 15          Quincy Acy       2013     2018        F    6-7    240
##           birth_date                        college
## 5   November 3, 1974      San Jose State University
## 6  December 11, 1976       University of California
## 7        May 6, 1954             Indiana University
## 9   February 9, 1919 Salem International University
## 14  January 11, 1942              Hillsdale College
## 15   October 6, 1990              Baylor University
summary(forwards)
##      name             year_start      year_end      position        
##  Length:1290        Min.   :1947   Min.   :1947   Length:1290       
##  Class :character   1st Qu.:1971   1st Qu.:1973   Class :character  
##  Mode  :character   Median :1989   Median :1992   Mode  :character  
##                     Mean   :1987   Mean   :1990                     
##                     3rd Qu.:2004   3rd Qu.:2010                     
##                     Max.   :2018   Max.   :2018                     
##                                                                     
##     height              weight       birth_date          college         
##  Length:1290        Min.   :165.0   Length:1290        Length:1290       
##  Class :character   1st Qu.:205.0   Class :character   Class :character  
##  Mode  :character   Median :219.0   Mode  :character   Mode  :character  
##                     Mean   :218.1                                        
##                     3rd Qu.:230.0                                        
##                     Max.   :285.0                                        
##                     NA's   :1
hist(forwards$weight, main = "weight of forwards", col="green")

## how can I plot two hist at the same time?
## what about C-F?

#Use the dataset to visually investigate if the distribution of the height of centers (C) is greater than the distribution of the height of forwards (F).

#install.packages(tidyr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- separate(data = centers, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(df$ft)
head(ft_numeric)
## [1] 7 7 7 7 6 6
inch_numeric <- as.numeric(df$inch)

hist(ft_numeric*12+inch_numeric, main = "height of centers (inch)", col="blue")

dff <- separate(data = forwards, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(dff$ft)
head(ft_numeric)
## [1] 6 6 6 6 6 6
inch_numeric <- as.numeric(dff$inch)
head(inch_numeric)
## [1] 6 9 7 3 6 7
hist(ft_numeric*12+inch_numeric, main = "height of forwards (inch)", col="green")

##Use the dataset to visually investigate if the distribution of height is different between any of the positions.

#install.packages(dplyr) #library(dplyr) head(hello$position) count(hello,position) # c = 502, F = 1290, G= 1574, let’s check height of G

guards <- subset(hello, hello$position == "G")
dfff <- separate(data = guards, col = height, into = c("ft", "inch"), sep = "\\-")
ft_numeric <- as.numeric(dfff$ft)
head(ft_numeric)
## [1] 6 6 6 6 6 6
inch_numeric <- as.numeric(dfff$inch)
head(inch_numeric)
## [1] 1 3 5 0 4 5
hist(ft_numeric*12+inch_numeric, main = "height of guard (inch)", col="red")

## G<F<C